### Performs the individual level analysis (with matching, etc)
### Loads the extract of the survey data (2014 IBOPE)
### We first perform the matching and check balance, 
### Then analyze the matched data set using serveral different models
rm(list=ls(all=TRUE))
#setwd("~/pathwheredatafilesare")
library(Zelig)
library(ZeligChoice)
library(MatchIt)
library(car)
library(RItools)
library(xtable)


load("data_replic2014ibope.RData")


######################################################################################
####																			  ####
#### Matching and balance anlaysis									  	  			####
#### 	                                            							  ####
######################################################################################

### Missing data in income (probably not poor) and undecided voters...
tmp <- na.omit(new.data)

### Differences from 2010: 
### Don't have HDI of municipality, growth, and distance to cpaital city
### Included race, and region in PS
tmp$ps <- ps <- fitted(lm(bf~sexo+idade+escola+renda+metropolitan+race+region+munsize,data=tmp))
match.formula <- formula("bf~sexo+idade+escola+renda+metropolitan+race+region+munsize+ps")


### Check balance before matching!
	bal<-xBalance(match.formula,data=tmp,report="all")
	cat("Bowler and Hanson omnibus test\n");print(bal$overall)
	png(file="figure-rawbalance2014IBOPE.png")
	plot(bal,thecols=1,legend=F,mar=c(3,2,.5,.5),xaxt="n")
	abline(v=c(-.25,.25),lty=2)
	dev.off()

matching <- matchit(match.formula
                   ,data=tmp,method="genetic",replace=T,ratio=1,pop.size=125) 
                   
### Plot matching reuslts #########
pdf(file="figure-balimprove2014IBOPE.pdf")
plot(summary(matching,standardize=TRUE),interactive=F)
dev.off()

### Compute and save matching statistics ###
match.stats <- cbind(
	summary(matching)$sum.all[,1:2],
	summary(matching)$sum.matched[,1:2],
	Reduction=summary(matching)$reduction[,1]
)   

#Matching was done with replacement, so weights are different for eaach observation
#xBalance doesn't take weights, so use work-arournd to compute Bowler&Hansosn stats
forbal <- rbind(tmp[matching$match.matrix[,1],],	
			tmp[rownames(matching$match.matrix),])
bal<-xBalance(match.formula,data=forbal,report="all")
cat("Bowler and Hanson omnibus test\n");print(bal$overall)
match.stats <- list(match.stats,BowlerHanson=bal$overall)
balance<- list(president=match.stats)  #reorted in Table 01
pdf(file="figure-balance2014IBOPE.pdf")
plot(bal,thecols=1,legend=F,mar=c(3,2,.5,.5),xlim=c(-0.3,.3),xaxt="n")
abline(v=c(-.25,.25),lty=2)
dev.off()


########################################################################################
#
#  Linear probability models with bootstrapped standard errors and simple logit 
#  See http://cran.r-project.org/doc/contrib/Fox-Companion/appendix-bootstrapping.pdf
#
#######################################################################################
fit1 <- zelig(dilma ~   bf, model="logit",weights=matched.data$weights,data=matched.data,cite=F)
#fit1f <- zelig(lula ~   bf, model="logit",data=forbal,cite=F)
    bf0 <- setx(fit1 , bf=0)
    bf1 <- setx(fit1 ,bf=1)
	out <- sim(fit1,x=bf0,x1=bf1)
	
logit.2014 <- c('NonBenef'=out$stats[[1]][1],
				'DiffBenef'=out$stats[[5]][1],
				'BootSe'=out$stats[[5]][2],
				'BootCi'=out$stats[[5]][4:5])
				
fit2m <- lm(dilma ~  bf ,data=matched.data,weights=matched.data$weights)

boot.fct <- function(x,i){##function to bootstrap standard errors of "bf"
	tmp <- lm(dilma ~  bf ,data=x[i,],weights=x$weights)
	out <- coefficients(summary(tmp))["bf","Estimate"]
	return(out)
}

tmp <- boot(matched.data, boot.fct, R=1000,sim = "ordinary")
boot.se <- sd(tmp$t)
boot.ci <- quantile(tmp$t,prob=c(0.025,0.975))

lpm.bootse.2014 <- c('NonBenef'=as.numeric(fit2m$coef[1]),
				'DiffBenef'=as.numeric(fit2m$coef[2]),
				'BootSe'=as.numeric(boot.se),
				'BootCi'=as.numeric(boot.ci))

### Reported in Figure 01
output2014 <- list(lpm=lpm.bootse.2014,logit=logit.2014)










######################################################################################
####																			  ####
#### Second Matching (on high-low coverage) for UFMG       	  					  ####
#### 	                                            							  ####
######################################################################################

#### Get CCT coverage info by State, and create indicators ####
load('~/Dropbox/Data/Paper-BF-2014/data_bolsafamilia2014.RData')

uf<- data.frame(fam.cct=as.matrix(by(dd$ fam.2014.cct,dd$state,sum,na.rm=T)),
		   tot.fam=as.matrix(by(dd$ tot.fam,dd$state,sum,na.rm=T)))
uf$cct <- uf$fam.cct/uf$tot.fam
uf <- uf[order(uf$cct),]
uf$highcct <- uf$cct>=.40
uf$lowcct <- uf$cct<=.20
uf <- subset(uf,select=c(cct,highcct,lowcct))
uf$state <- as.character(rownames(uf))
new.data$state <- as.character(new.data$state)
new.data <- merge(new.data,uf,by="state",all.x=T)

### Keep only people in "high" and "low" coverage states
new.data <- subset(new.data,highcct==T|lowcct==T)

### Keep only people without missing data in income (probably not poor) and undecided voters...
tmp <- na.omit(new.data)

### Compute propoensity score (can't use region here, because linked to "treatment")
tmp$ps <- ps <- fitted(lm(highcct~sexo+idade+escola+renda+metropolitan+race+munsize,data=tmp))

### Can't use region here, because linked to "treatment"
match.formula <- formula("highcct~sexo+idade+escola+renda+metropolitan+race+munsize+ps")
matching <- matchit(match.formula
                   ,data=tmp,method="genetic",replace=T,ratio=1,pop.size=125) 
                   
#save(matching,file="matchingoutput-IBO2014-cctcoverage.RData")
#load("matchingoutput-IBO2014-cctcoverage.RData")

### Plot matching reuslts #########
pdf(file="Figures/figure-balimprove2014IBOPE-cctcoverage.pdf")
plot(summary(matching,standardize=TRUE),interactive=F)
dev.off()

### Compute and save matching statistics ###
match.stats <- cbind(
	summary(matching)$sum.all[,1:2],
	summary(matching)$sum.matched[,1:2],
	Reduction=summary(matching)$reduction[,1]
)   
forbal <- rbind(tmp[matching$match.matrix[,1],],	#xBalance doesn't take weights, work aournd
			tmp[rownames(matching$match.matrix),])
bal<-xBalance(match.formula,data=forbal,report="all")
pdf(file="Figures/figure-balance2014IBOPE-cctcoverage.pdf")
plot(bal,thecols=1,legend=F,mar=c(3,2,.5,.5),xlim=c(-0.3,.3),xaxt="n")
abline(v=c(-.25,.25),lty=2)
dev.off()
cat("Bowler and Hanson omnibus test\n");print(bal$overall)
match.stats <- list(match.stats,BowlerHanson=bal$overall)
balance<- list(president=match.stats)
save(balance,file="output-balance2014IBOPE-cctcoverage.RData")

matched.data <- match.data(matching)
matched.data$dilma <- as.numeric(as.character(matched.data$dilma))
matched.data$renda <- factor(matched.data$renda) 

fit1 <- zelig(dilma ~   highcct, model="logit",weights=matched.data$weights,data=matched.data,cite=F)
    bf0 <- setx(fit1 , highcct=0)
    bf1 <- setx(fit1, highcct=1)
	out <- sim(fit1,x=bf0,x1=bf1)
	
logit.2014 <- c('NonBenef'=out$stats[[1]][1],
				'DiffBenef'=out$stats[[5]][1],
				'BootSe'=out$stats[[5]][2],
				'BootCi'=out$stats[[5]][4:5])
				
fit2m <- lm(dilma ~  highcct ,data=matched.data,weights=matched.data$weights)
boot.fct <- function(x,i){##function to bootstrap standard errors of "bf"
	tmp <- lm(dilma ~  highcct ,data=x[i,],weights=x$weights)
	out <- coefficients(summary(tmp))["highcctTRUE","Estimate"]
	return(out)
}
tmp <- boot(matched.data, boot.fct, R=1000,sim = "ordinary")
boot.se <- sd(tmp$t)
boot.ci <- quantile(tmp$t,prob=c(0.025,0.975))

lpm.bootse.2014 <- c('NonBenef'=as.numeric(fit2m$coef[1]),
				'DiffBenef'=as.numeric(fit2m$coef[2]),
				'BootSe'=as.numeric(boot.se),
				'BootCi'=as.numeric(boot.ci))

output2014 <- list(lpm=lpm.bootse.2014,logit=logit.2014)
save(output2014,file="output-2014cctcoverage.RData")


